///
///	@file 	url.cpp
/// @brief 	Url manipulation routines
///	@overview Miscellaneous routines to parse and enscape URLs.
//
/////////////////////////////////// Copyright //////////////////////////////////
//
//	@copy	default.g
//	
//	Copyright (c) Mbedthis Software LLC, 2003-2007. All Rights Reserved.
//	Portions Copyright (c) GoAhead Software, 1995-2000. All Rights Reserved.
//	
//	This software is distributed under commercial and open source licenses.
//	You may use the GPL open source license described below or you may acquire 
//	a commercial license from Mbedthis Software. You agree to be fully bound 
//	by the terms of either license. Consult the LICENSE.TXT distributed with 
//	this software for full details.
//	
//	This software is open source; you can redistribute it and/or modify it 
//	under the terms of the GNU General Public License as published by the 
//	Free Software Foundation; either version 2 of the License, or (at your 
//	option) any later version. See the GNU General Public License for more 
//	details at: http://www.mbedthis.com/downloads/gplLicense.html
//	
//	This program is distributed WITHOUT ANY WARRANTY; without even the 
//	implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
//	
//	This GPL license does NOT permit incorporating this software into 
//	proprietary programs. If you are unable to comply with the GPL, you must
//	acquire a commercial license to use this software. Commercial licenses 
//	for this software and support services are available from Mbedthis 
//	Software at http://www.mbedthis.com 
//	
//	@end
//
////////////////////////////////// Includes ////////////////////////////////////

#include	"shared.h"

///////////////////////////////////// Locals ///////////////////////////////////
//
//	Standard HTTP error codes
//
MaHttpError MaHttpErrors[] = {
	{ 100, "Continue" },
	{ 200, "OK" },
	{ 201, "Created" },
	{ 202, "Accepted" },
	{ 204, "No Content" },
	{ 205, "Reset Content" },
	{ 206, "Partial Content" },
	{ 301, "Moved Permanently" },
	{ 302, "Moved Temporarily" },
	{ 304, "Not Modified" },
	{ 400, "Bad Request" },
	{ 401, "Unauthorized" },
	{ 402, "Payment Required" },
	{ 403, "Forbidden" },
	{ 404, "Not Found" },
	{ 405, "Method Not Allowed" },
	{ 408, "Request Time-out" },
	{ 411, "Length Required" },
	{ 413, "Request Entity Too Large" },
	{ 414, "Request-URI Too Large" },
	{ 415, "Unsupported Media Type" },
	{ 416, "Requested Range Not Satisfiable" },
	{ 500, "Internal Server Error" },
	{ 501, "Not Implemented" },
	{ 502, "Bad Gateway" },
	{ 503, "Service Unavailable" },
	{ 504, "Gateway Time-out" },
	{ 505, "Http Version Not Supported" },
	{ 507, "Insufficient Storage" },

	//
	//	Proprietary codes (used internally) when connection to client is severed
	//
	{ 550, "Comms Error" },
	{ 551, "General Client Error" },
	{ 0,   0 }
};

//
//	Character escape/descape matching codes. Generated by charGen.
//
static uchar charMatch[256] = {
	 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 0, 6, 5, 2, 4, 3, 2, 3, 3, 2, 0, 0, 0, 0, 0,
	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 7, 6,
	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 0,
	 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 2, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
	 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 
};
 
////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////// Url //////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

MaUrl::MaUrl()
{
	parsedUrlBuf = 0;
	port = -1;
	proto = host = uri = ext = query = 0;
}

////////////////////////////////////////////////////////////////////////////////

MaUrl::~MaUrl()
{
	mprFree(parsedUrlBuf);
}

////////////////////////////////////////////////////////////////////////////////
//
//	Max size of the port specification in a URL
//
#define MAX_PORT_LEN 6

//
//	Algorithm originally in the GoAhead WebServer.
//
int MaUrl::parse(char *url)
{
	char	*tok, *cp, *portStr, *last_delim, *hostbuf, *portbuf;
	char 	*htmlExt = "html";
	int		c, len, ulen;

	mprAssert(url && *url);

	if (parsedUrlBuf) {
		mprFree(parsedUrlBuf);
	}

	ulen = strlen(url);

	//
	//	Allocate a single buffer to hold all the cracked fields.
	//	Store host, port and url strings (3 nulls).
	//
	len = ulen * 2 + MAX_PORT_LEN + 3;
	parsedUrlBuf = (char*) mprMalloc(len * sizeof(char));
	portbuf = &parsedUrlBuf[len - MAX_PORT_LEN - 1];
	hostbuf = &parsedUrlBuf[ulen+1];
	strcpy(parsedUrlBuf, url);
	url = parsedUrlBuf;

	//
	//	Defaults for missing ULR fields
	//
	strcpy(portbuf, "80");
	portStr = portbuf;
	uri = "/";
	proto = "http";
	host = "localhost";
	query = "";
	ext = htmlExt;

	if (strncmp(url, "http://", 7) == 0) {
		tok = &url[7];
		tok[-3] = '\0';
		proto = url;
		host = tok;
		for (cp = tok; *cp; cp++) {
			if (*cp == '/') {
				break;
			}
			if (*cp == ':') {
				*cp++ = '\0';
				portStr = cp;
				tok = cp;
			}
		}
		if ((cp = strchr(tok, '/')) != NULL) {
			c = *cp;
			*cp = '\0';
			mprStrcpy(hostbuf, ulen + 1, host);
			mprStrcpy(portbuf, MAX_PORT_LEN, portStr);
			*cp = c;
			host = hostbuf;
			portStr = portbuf;
			uri = cp;
			tok = cp;
		}

	} else {
		uri = url;
		tok = url;
	}

	//
	//	Split off the query string.
	//
	if ((cp = strchr(tok, '?')) != NULL) {
		*cp++ = '\0';
		query = cp;
		uri = tok;
		tok = query;
	}

	//
	//	Split off fragment identifier.
	// 
	if ((cp = strchr(tok, '#')) != NULL) {
		*cp++ = '\0';
		if (*query == 0) {
			uri = tok;
		}
	}

	//
	//	FUTURE -- this logic could be improved
	//
	if ((cp = strrchr(uri, '.')) != NULL) {
		if ((last_delim = strrchr(uri, '/')) != NULL) {
			if (last_delim > cp) {
				ext = htmlExt;
			} else {
				ext = cp + 1;
#if WIN
				mprStrLower(ext);
#endif
			}
		} else {
			ext = cp + 1;
#if WIN
			mprStrLower(ext);
#endif
		}
	} else {
		if (uri[strlen(uri) - 1] == '/') {
			ext = htmlExt;
		}
	}

	port = atoi(portStr);
	return 0;
}

////////////////////////////////////////////////////////////////////////////////
//
//	Encode a URL. The means encode special characters with hex equivalents.
//	This routine may also be used to encode query variables.
// 

char *maUrlEncode(char *buf, int len, char *uri, bool isAbsolute)
{
	static const char hexTable[] = "0123456789abcdef";
	uchar	c;
	char	*ip,  *op, *endp;
	
	mprAssert(buf);
	mprAssert(uri);
	mprAssert(buf != uri);

	ip = uri;
    if (!isAbsolute) {
		char *cp, *delim;
		cp = strchr(ip, ':');
		delim = strchr(ip, '/');
		if (cp && (!delim || cp < delim)) {
			*buf++ = '.';
			*buf++ = '/';
		}
    }

	op = buf;
	endp = &buf[len - 4];
    while ((c = (uchar) (*uri++)) != 0 && op < endp) {
		if (charMatch[c] & MPR_HTTP_ESCAPE_PATH) {
			*op++ = '%';
			*op++ = hexTable[c >> 4];
			*op++ = hexTable[c & 0xf];
		} else {
			*op++ = c;
		}
    }
    *op = '\0';
	return op;
}

////////////////////////////////////////////////////////////////////////////////
//
//	Decode a URL (or part thereof). Can work insitu (ie. buf == uri)
//	This routine may also be used to encode query variables.
// 

char *maUrlDecode(char *buf, int len, char *uri, bool isUrl, bool multipleArgs)
{
	char	*ip,  *op;
	int		num, i, c;
	
	mprAssert(buf);
	mprAssert(uri);

	len -= 1;
	for (op = buf, ip = uri; *ip && len > 0; ip++, op++) {
		if (*ip == '+' && multipleArgs) {
			*op = ' ';

		} else if (*ip == '%' && isxdigit(ip[1]) && isxdigit(ip[2])) {
			ip++;
			num = 0;
			for (i = 0; i < 2; i++, ip++) {
				c = tolower(*ip);
				if (c >= 'a' && c <= 'f') {
					num = (num * 16) + 10 + c - 'a';
				} else if (c >= '0' && c <= '9') {
					num = (num * 16) + c - '0';
				} else {
					mprError(MPR_L, MPR_LOG, "Bad chars in URL %s", uri);
					return 0;
				}
			}
			if (num == 0 || (num == 0x2f && isUrl)) {
				mprError(MPR_L, MPR_LOG, "Bad hex char in URL %x", num);
				return 0;
			}
			*op = (char) num;
			ip--;

		} else {
			*op = *ip;
		}
		len--;
	}
	*op = '\0';
	return buf;
}

////////////////////////////////////////////////////////////////////////////////

//
//	Escape a shell command
// 

char *maEscapeCmd(char *buf, int len, char *cmd, int escChar)
{
	uchar	c;
	char	*op, *endp;
	
	mprAssert(buf);
	mprAssert(cmd);
	mprAssert(buf != cmd);

	if (escChar == 0) {
		escChar = '\\';
	}
	op = buf;
	endp = &buf[len - 2];
    while ((c = (uchar) *cmd++) != 0 && op < endp) {
#if WIN || NW || OS2
		if ((c == '\r' || c == '\n') && *cmd != '\0') {
			c = ' ';
			continue;
		}
#endif
		if (charMatch[c] & MPR_HTTP_ESCAPE_SHELL) {
			*op++ = escChar;
		}
		*op++ = c;
    }
    *op = '\0';
	return op;
}

////////////////////////////////////////////////////////////////////////////////
//
//	Escape HTML to escape defined characters (prevent cross-site scripting)
//

char *maEscapeHtml(char *buf, int buflen, char *html)
{
	char	*bp, *endp;

	//
	//	Leave room for the biggest expansion 
	//
	bp = buf;
	endp = &buf[buflen - 6];
    while (*html != '\0' && bp < endp) {
		if (charMatch[(uchar) *html] & MPR_HTTP_ESCAPE_HTML) {
			if (*html == '&') {
				strcpy(bp, "&amp;");
				bp += 5;
			} else if (*html == '<') {
				strcpy(bp, "&lt;");
				bp += 4;
			} else if (*html == '>') {
				strcpy(bp, "&gt;");
				bp += 4;
			} else if (*html == '#') {
				strcpy(bp, "&#35;");
				bp += 5;
			} else if (*html == '(') {
				strcpy(bp, "&#40;");
				bp += 5;
			} else if (*html == ')') {
				strcpy(bp, "&#41;");
				bp += 5;
			} else {
				mprAssert(0);
			}
			html++;
		} else {
			*bp++ = *html++;
		}
	}
    *bp = '\0';
    return buf;
}

////////////////////////////////////////////////////////////////////////////////
//
//	Validate the Uri path. The passed in uri may be modified in-situ if 
//	required.
//
//	WARNING: this code will not fully validate against certain Windows 95/98/Me
//	bugs. Don't use this code in these operating systems without modifying 
//	this code to remove "con", "nul", "aux", "clock$" and "config$" in either
//	case from the URI. The MprFileSystem::stat() will perform these checks to
//	determine if a file is a device file.
//
//	Adapted from an algorithm in Apache.
//

char *maValidateUri(char *uri)
{
	char	*sp, *dp, *xp, *dot;

	//
	//	Remove multiple path separators and map '\\' to '/' for windows
	//
	sp = dp = uri;
	while (*sp) {
#if WIN || NW || OS2
		if (*sp == '\\') {
			*sp = '/';
		}
#endif
		if (sp[0] == '/' && sp[1] == '/') {
			sp++;
		} else {
			*dp++ = *sp++;
		}
	}
	*dp = '\0';

	dot = strchr(uri, '.');
	if (dot == 0) {
		return uri;
	}

	//
	//	Per RFC 1808, remove "./" segments
	//
	dp = dot;
	for (sp = dot; *sp; ) {
		if (*sp == '.' && sp[1] == '/' && (sp == uri || sp[-1] == '/')) {
			sp += 2;
		} else {
			*dp++ = *sp++;
		}
	}
	*dp = '\0';

	//
	//	Remove trailing "."
	//
	if ((dp == &uri[1] && uri[0] == '.') ||
		(dp > &uri[1] && dp[-1] == '.' && dp[-2] == '/')) {
		*--dp = '\0';
	}

	//
	//	Remove "../"
	//
	for (sp = dot; *sp; ) {
		if (*sp == '.' && sp[1] == '.' && sp[2] == '/' && 
			(sp == uri || sp[-1] == '/')) {
			xp = sp + 3;
			sp -= 2;
			if (sp < uri) {
				sp = uri;
			} else {
				while (sp >= uri && *sp != '/') {
					sp--;
				}
				sp++;
			}
			dp = sp;
			while ((*dp++ = *xp) != 0) {
				xp++;
			}
		} else {
			sp++;
		}
	}
	*dp = '\0';

	//
	//	Remove trailing "/.."
	//
	if (sp == &uri[2] && *uri == '.' && uri[1] == '.') {
		*uri = '\0';
	} else {
		if (sp > &uri[2] && sp[-1] == '.' && sp[-2] == '.' && sp[-3] == '/') {
			sp -= 4;
			if (sp < uri) {
				sp = uri;
			} else {
				while (sp >= uri && *sp != '/') {
					sp--;
				}
				sp++;
			}
			*sp = '\0';
		}
	}
	return uri;
}

////////////////////////////////////////////////////////////////////////////////

char *maGetHttpErrorMsg(int code)
{
	MaHttpError*	ep;

	for (ep = MaHttpErrors; ep->code; ep++) {
		if (code == ep->code) {
			return ep->msg;
		}
	}
	return "Custom error";
}

////////////////////////////////////////////////////////////////////////////////

//
// Local variables:
// tab-width: 4
// c-basic-offset: 4
// End:
// vim: sw=4 ts=4 
//
